This online supplement reproduces all the Figures and Tables presented in the publication “Rethinking the Funding Line at the Swiss National Science Foundation: Bayesian Ranking and Lottery”. The methodology of the expected rank (ER) for grant peer review is fully implemented in the R package {ERforResearch} downloadable from github with documentation. To install the package, the following code can be used:
# Install ERforResearch from github:
# devtools::install_github("snsf-data/ERforResearch")
library(ERforResearch)
## Some helper functions for later:
# Function to extract the Intraclass correlation coefficient (ICC) from the
# matrix of individual votes:
get_icc_from_matrix <- function(individual_votes) {
(individual_votes %>%
select(-proposal) %>%
mutate_all(function(x) ERforResearch:::get_num_grade_snsf(x)) %>%
as.data.frame() %>%
psych::ICC(x = ., missing = FALSE))$results %>%
filter(type == "ICC3") %>% # A fixed set of k judges rates each target
mutate(icc = paste0(round(ICC, 2), " (",
round(`lower bound`, 2), "; ",
round(`upper bound`, 2), ")")) %>%
pull(icc) %>%
return()
}
# Function to graphically represent all the votes, and the ICC from the
# matrix of individual votes:
get_grades_plot <- function(long_individual_votes, individual_votes_mat = NULL,
x_min = NULL, x_max = NULL, title = "",
jitter_h = .02, jitter_w = .025,
jitter_alpha = .5){
if (is.null(x_min)){
x_min <- long_individual_votes %>% pull(num_grade) %>% min
x_min <- ifelse(x_min > 1, x_min - 1.2, x_min -.2)
}
if (is.null(x_max)){
x_max <- long_individual_votes %>% pull(num_grade) %>% max
x_max <- ifelse(x_max < 6, x_max + 1.2, x_max +.2)
}
if (!is.null(individual_votes_mat)){
icc <- get_icc_from_matrix(individual_votes_mat)
title <- paste0(ifelse(title == "", "ICC = ",
paste0(title, ": ICC = ")), icc)
}
plot_data <- long_individual_votes %>%
group_by(proposal) %>%
mutate(avg = mean(num_grade, na.rm = TRUE)) %>%
ungroup()
plot_data <- plot_data %>%
select(proposal, avg) %>%
distinct() %>%
arrange(avg) %>%
mutate(order = 1:n()) %>%
select(-avg) %>%
left_join(plot_data, by = "proposal")
plot_data %>%
ggplot(aes(x = num_grade, y = order)) +
geom_jitter(aes(color = "Individual Votes"), height = jitter_h,
width = jitter_w, alpha = jitter_alpha, size = 1) +
geom_point(aes(x = avg, color = "Average"), size = 1.5) +
theme_minimal() +
scale_y_continuous(breaks = unique(plot_data$order),
labels =
as.numeric(str_extract_all(unique(plot_data$proposal),
"[0-9]+"))) +
labs(x = "Assessor Vote", y = "Ordered proposal", title = title) +
lims(x = c(x_min, x_max)) +
scale_color_manual(name = " ",
values = c("Average"="#FF6F6F",
"Individual Votes" = "#848383")) +
theme(axis.text.y = element_blank(),
legend.position = c(0.5, .5),
axis.title.y = element_blank(),
panel.grid.major.y = element_line(size = .1))
}
# Bland-Altman Plot for Agreement:
get_agreement_plot <- function(rankings, rankings_ordinal, title, ymin = -.5,
ymax = .5, pt_alpha = .8, pt_size = 1){
difference <- rankings %>%
select(id_application, er) %>%
rename(er1 = er) %>%
left_join(rankings_ordinal %>%
select(id_application, er) %>%
rename(er2 = er),
by = "id_application") %>%
mutate(difference = er1 - er2,
avg = (er1 + er2)/2) %>%
select(id_application, difference, avg)
d <- difference %>%
summarise(mean(difference)) %>% pull
sd <- difference %>%
summarise(sd(difference)) %>% pull
difference %>%
ggplot(aes(x = avg, y = difference)) +
geom_point(alpha = pt_alpha, size = pt_size) +
geom_hline(yintercept = d, color = "gray") +
geom_hline(yintercept = d - 1.96 * sd, color = "darkred", lty = "dotted") +
geom_hline(yintercept = d + 1.96 * sd, color = "darkred", lty = "dotted") +
ylim(ymin, ymax) +
labs(y = "Difference in ER", x = "Average ER of both approaches",
title = title) +
snf.plot::snsf_theme
}
# Percentile Loss Function
percentile_loss <- function(true_rank, estimated_rank, perc = .3, p = 2,
q = 2, c = 1){
I <- length(true_rank)
true_percentile <- true_rank / (I + 1)
estimated_percentile <- estimated_rank / (I + 1)
AB <- (true_percentile > perc) & (estimated_percentile < perc)
BA <- (true_percentile < perc) & (estimated_percentile > perc)
loss <- (sum(abs(perc - estimated_percentile)^p * AB +
c * abs(estimated_percentile - perc)^q * BA)) / I
return(loss)
}
The main paper presents two major case studies. The methodology of the Bayesian Ranking (BR) is applied to give funding recommendations for two funding instruments at the Swiss National Science Foundation(SNSF): the Postdoc.Mobility fellowship for early career researchers and the SNSF Project Funding scheme. The data for both case studies is available on Zenodo and can accessed directly through R using the following code chunk:
# Load the data from Zenodo:
path_to_xlsx <- "https://zenodo.org/record/4531160/files/individual_votes.xlsx"
hss_s_mat <- openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "pm_hsss")
hss_s <- hss_s_mat %>%
get_right_data_format(prefix_voter = "voter")
hss_h_mat <- openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "pm_hssh")
hss_h <- hss_h_mat %>%
get_right_data_format(prefix_voter = "voter")
ls_b_mat <- openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "pm_lsb")
ls_b <- ls_b_mat %>%
get_right_data_format(prefix_voter = "voter")
ls_m_mat <- openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "pm_lsm")
ls_m <- ls_m_mat %>%
get_right_data_format(prefix_voter = "voter")
stem_mat <- openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "pm_stem")
stem <- stem_mat %>%
get_right_data_format(prefix_voter = "voter")
mint_section1_mat <-
openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "mint_section1")
mint_section1 <- mint_section1_mat %>%
get_right_data_format(prefix_voter = "voter") %>%
mutate(section = "one")
mint_section2_mat <-
openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "mint_section2")
mint_section2 <- mint_section2_mat %>%
get_right_data_format(prefix_voter = "voter") %>%
mutate(section = "two")
mint_section3_mat <-
openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "mint_section3")
mint_section3 <- mint_section3_mat %>%
get_right_data_format(prefix_voter = "voter") %>%
mutate(section = "three")
mint_section4_mat <-
openxlsx::read.xlsx(xlsxFile = path_to_xlsx, sheet = "mint_section4")
mint_section4 <- mint_section4_mat %>%
get_right_data_format(prefix_voter = "voter") %>%
mutate(section = "four")
# Note that we want to be able to differentiate the proposals in the different
# mint sections
mint_sections <- mint_section1 %>%
mutate(proposal = paste0(proposal, "_1")) %>%
bind_rows(mint_section2%>%
mutate(proposal = paste0(proposal, "_2"))) %>%
bind_rows(mint_section3%>%
mutate(proposal = paste0(proposal, "_3"))) %>%
bind_rows(mint_section4%>%
mutate(proposal = paste0(proposal, "_4")))
##### How many projects can still be funded? ####
how_many_fundable <- c(7, 4, 7, 8, 6)
names(how_many_fundable) <- c("hss_s", "hss_h", "ls_m", "ls_b", "stem")
The fist case study focuses on the evaluation of the Postdoc.Mobility (PM) Call of February 1st 2020. Junior researchers who have recently defended their PhD and wish to pursue an academic career can apply for a PM fellowship, which will allow them to work in a research group abroad for two years. The researchers submit their research proposal to the SNSF, which is then evaluated by two referees who score the proposal on a six-point scale (1 being poor quality and 6 being outstanding quality). Each proposal is evaluated by one of the following panels: Humanities; Social Sciences; Science, Technology, Engineering and Mathematics (STEM); Medicine; and Biology. As described in the main paper, the dataset on the PM panels contains the evaluation of the assessors on the proposals which were triaged into the ‘panel discussion’ group (e.g. not directly accepted, nor rejected). Each panel has their own available funding, so that five different funding lines are drawn. The proposals are discussed in the respective panel, where each member votes on a score for each of the proposals, unless they have a conflict of interest or another reason to be absent during the voting. Table 1.1 summarizes the number of proposals in the different ‘panel discussion’ groups together with the number of proposals that can still be funded and the size of the panel.
| N. of panel members | Total N. of submissions | N. of proposals discussed | N. of fundable proposals | |
|---|---|---|---|---|
| Humanities | 14 | 23 | 11 | 4 |
| Social Sciences | 14 | 38 | 18 | 7 |
| Biology | 26 | 35 | 18 | 8 |
| Medicine | 17 | 35 | 14 | 7 |
| STEM | 29 | 50 | 18 | 6 |
For each of the five panels, Figure 1.1 presents the votes of the panel members to each of the proposals discussed. The proposals are represented on the y-axis, and ordered depending on the average of the individual votes they got (red dots). We see a large variability in the votes. Note that we only consider the middle block of proposals that were neither truly exceptional nor of very low quality and that the variability of the votes per proposals tends to be larger in the middle block than for proposals a with very low/high average score. Some of the members of the STEM panel still assessed a few proposals with a 2, which is the second lowest grade; not surprisingly the STEM panel has the lowest intraclass correlation (ICC), representing the assessor reliability. In all 5 panels, the assessor reliability can be classified as either poor or fair.
leg <- cowplot::get_legend(get_grades_plot(hss_h))
gridExtra::grid.arrange(get_grades_plot(hss_h, hss_h_mat, x_min = 1.8,
title = "Humanities") +
theme(legend.position = "none"),
get_grades_plot(hss_s, hss_s_mat, x_min = 1.8,
title = "Social Sciences") +
theme(legend.position = "none"),
get_grades_plot(ls_b, ls_b_mat, x_min = 1.8,
title = "Biology")+
theme(legend.position = "none"),
get_grades_plot(ls_m, ls_m_mat, x_min = 1.8,
title = "Medicine")+
theme(legend.position = "none"),
get_grades_plot(stem, stem_mat, x_min = 1.8,
title = "STEM")+
theme(legend.position = "none"),
leg)
Figure 1.1: The individual votes given to all proposals by all panel members. The red dots represent the averages of those individual votes for each proposal. The intra-class correlation coefficients together with their 95% confidence intervals of the different panels can be found in the titles of the subfigures. In all panels, the assessor reliability can be classified as poor to fair.
Then, the Bayesian hierarchical models will be fitted for each of the five panels individually and the expected ranks will be retrieved using the MCMC samples from the latter models.
##### The model for JAGS ####
# we will use the default
##### Get the ER results: ####
# To make the code run faster, make sure to decrease those numbers
n.iter <- 120 * 10^{3}
n.burnin <- 20 * 10^{3}
n.adapt <- 20 * 10^{3}
n.chains <- 2
seed <- 1991
# as we checked the number of iterations etc needed for convergence before,
# the argument max_iter set to the same as n.iter ensures that the algorithm is
# not rerun until convergence, but only once.
mcmc_hss_s_object <-
get_mcmc_samples(data = hss_s,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
n_chains = n.chains, n_iter = n.iter,
n_burnin = n.burnin, n_adapt = n.adapt,
max_iter = 120 * 10^{3},
other_variables = "nu",
inits_type = "overdispersed",
seed = seed, quiet = TRUE, dont_bind = TRUE)
mcmc_hss_s <- mcmc(do.call(rbind, mcmc_hss_s_object))
mcmc_hss_h_object <-
get_mcmc_samples(data = hss_h,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
n_chains = n.chains, n_iter = n.iter,
n_burnin = n.burnin, n_adapt = n.adapt,
max_iter = 120 * 10^{3},
other_variables = "nu",
inits_type = "overdispersed",
seed = seed, quiet = TRUE, dont_bind = TRUE)
mcmc_hss_h <- mcmc(do.call(rbind, mcmc_hss_h_object))
mcmc_ls_b_object <-
get_mcmc_samples(data = ls_b,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
n_chains = n.chains, n_iter = n.iter,
n_burnin = n.burnin, n_adapt = n.adapt,
max_iter = 120 * 10^{3},
other_variables = "nu",
inits_type = "overdispersed",
seed = seed, quiet = TRUE, dont_bind = TRUE)
mcmc_ls_b <- mcmc(do.call(rbind, mcmc_ls_b_object))
mcmc_ls_m_object <-
get_mcmc_samples(data = ls_m,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
n_chains = n.chains, n_iter = n.iter,
n_burnin = n.burnin, n_adapt = n.adapt,
max_iter = 120 * 10^{3},
other_variables = "nu",
inits_type = "overdispersed",
seed = seed, quiet = TRUE, dont_bind = TRUE)
mcmc_ls_m <- mcmc(do.call(rbind, mcmc_ls_m_object))
mcmc_stem_object <-
get_mcmc_samples(data = stem,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
n_chains = n.chains, n_iter = n.iter,
n_burnin = n.burnin, n_adapt = n.adapt,
max_iter = 120 * 10^{3},
other_variables = "nu",
inits_type = "overdispersed",
seed = seed, quiet = TRUE, dont_bind = TRUE)
mcmc_stem <- mcmc(do.call(rbind, mcmc_stem_object))
# Computation of the ER and PCER:
ranks_hss_s <- get_er_from_jags(data = hss_s,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
mcmc_samples = mcmc_hss_s_object)
ranks_hss_h <- get_er_from_jags(data = hss_h,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
mcmc_samples = mcmc_hss_h_object)
ranks_ls_b <- get_er_from_jags(data = ls_b,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
mcmc_samples = mcmc_ls_b_object)
ranks_ls_m <- get_er_from_jags(data = ls_m,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
mcmc_samples = mcmc_ls_m_object)
ranks_stem <- get_er_from_jags(data = stem,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
mcmc_samples = mcmc_stem_object)
Figure 1.2 presents the different stages of ranking of the proposals for all panels together with the rankability. The first column of points in the figure represents ranking based on the simple averages (Fixed). Then the proposals are ranked based on the posterior means of θi (Posterior Mean) and finally, the last column of points the expected rank. The rankability (in the titles of the plots, close to ICC and introduced in van Houwelingen et al.) differs from panel to panel. A naive funding line as represented by the color code in the figure is defined by simply funding the x best ranked proposals, where x can be retrieved from Table 1.1. Lower rankability suggests that more proposals are clustered and therefore not clearly differentiatable with their competitors. However, rankability is not an indication on how easy discrimination between funded and not funded proposals can be done (see the Humanities panel, with a relatively low rankability, but two clearly differentiated groups).
##### Plot the results ####
plot_hss_s <-
plotting_er_results(ranks_hss_s, result_show = TRUE, title = "Social Science",
id_application = "id_application",
pt_size = .5, line_size = .2, draw_funding_line = FALSE,
line_type_fl = "longdash", color_fl = "darkgray", grep_size = 3,
how_many_fundable = how_many_fundable["hss_s"])
plot_hss_h <-
plotting_er_results(ranks_hss_h, result_show = TRUE, title = "Humanities",
id_application = "id_application",
pt_size = .5, line_size = .2, draw_funding_line = FALSE,
line_type_fl = "longdash", color_fl = "darkgray", grep_size = 3,
how_many_fundable = how_many_fundable["hss_h"])
plot_ls_b <-
plotting_er_results(ranks_ls_b, result_show = TRUE, title = "Biology",
id_application = "id_application",
pt_size = .5, line_size = .2, draw_funding_line = FALSE,
line_type_fl = "longdash", color_fl = "darkgray", grep_size = 3,
how_many_fundable = how_many_fundable["ls_b"])
plot_ls_m <-
plotting_er_results(ranks_ls_m, result_show = TRUE, title = "Medicine",
id_application = "id_application",
pt_size = .5, line_size = .2, draw_funding_line = FALSE,
line_type_fl = "longdash", color_fl = "darkgray", grep_size = 3,
how_many_fundable = how_many_fundable["ls_m"])
plot_stem <-
plotting_er_results(ranks_stem, result_show = TRUE, title = "STEM",
id_application = "id_application",
pt_size = .5, line_size = .2, draw_funding_line = FALSE,
line_type_fl = "longdash", color_fl = "darkgray", grep_size = 3,
how_many_fundable = how_many_fundable["stem"])
leg <- cowplot::get_legend(plot_hss_h)
gridExtra::grid.arrange(plot_hss_h +
theme(legend.position = "none",
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.title.y = element_text(size = 7),
title = element_text(size = 7)),
plot_hss_s +
theme(legend.position = "none",
axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
title = element_text(size = 7)),
plot_ls_b +
theme(legend.position = "none",
axis.title.x = element_blank(),
axis.title.y = element_blank(),
title = element_text(size = 7)),
plot_ls_m +
theme(legend.position = "none",
axis.title.x = element_blank(),
axis.title.y = element_text(size = 7),
title = element_text(size = 7)),
plot_stem +
theme(legend.position = "none",
axis.title.y = element_blank(),
title = element_text(size = 7)),
leg,
ncol = 3)
Figure 1.2: PM fellowship proposals discussed in the different panels ranked using the average overall score (Fixed), the posterior means of the parameter θi and the ER. The color code shows which proposals would be funded, if the SNSF simply funds the x best proposals based on the ER, where x is panel-specific and shown in Table 1.1. Points representing tied proposals are on top of each other, but noticable as such because of the numbers.
To fully account for the uncertainty, we plot the same ER of the proposals together with their 50% credible intervals in Figure 1.3. The methodology then recommends the following decisions:
plot_er_dist_humanities <-
plot_er_distributions(get_mcmc_samples_result = mcmc_hss_h_object,
n_proposals = hss_h %>%
summarise(n_distinct(proposal)) %>% pull(),
name_er = "rank_theta", title = "Humanities",
number_fundable = how_many_fundable["hss_h"],
outer_show = FALSE, proposal = "")
plot_er_dist_social_sciences <-
plot_er_distributions(get_mcmc_samples_result = mcmc_hss_s_object,
n_proposals = hss_s %>%
summarise(n_distinct(proposal)) %>% pull(),
name_er = "rank_theta", title = "Social Sciences",
number_fundable = how_many_fundable["hss_s"],
outer_show = FALSE, proposal = "")
plot_er_dist_biology <-
plot_er_distributions(get_mcmc_samples_result = mcmc_ls_b_object,
n_proposals = ls_b %>%
summarise(n_distinct(proposal)) %>% pull(),
name_er = "rank_theta", title = "Biology",
number_fundable = how_many_fundable["ls_b"],
outer_show = FALSE, proposal = "")
plot_er_dist_medicine <-
plot_er_distributions(get_mcmc_samples_result = mcmc_ls_m_object,
n_proposals = ls_m %>%
summarise(n_distinct(proposal)) %>% pull(),
name_er = "rank_theta", title = "Medicine",
number_fundable = how_many_fundable["ls_m"],
outer_show = FALSE, proposal = "")
plot_er_dist_stem <-
plot_er_distributions(get_mcmc_samples_result = mcmc_stem_object,
n_proposals = stem %>%
summarise(n_distinct(proposal)) %>% pull(),
name_er = "rank_theta", title = "STEM",
number_fundable = how_many_fundable["stem"],
outer_show = FALSE, proposal = "")
leg <- cowplot::get_legend(plot_er_dist_stem)
gridExtra::grid.arrange(plot_er_dist_humanities +
theme(legend.position = "none",
title = element_text(size = 7)),
plot_er_dist_social_sciences +
theme(legend.position = "none",
title = element_text(size = 7)),
plot_er_dist_medicine +
theme(legend.position = "none",
title = element_text(size = 7)),
plot_er_dist_biology +
theme(legend.position = "none",
title = element_text(size = 7)),
plot_er_dist_stem +
theme(legend.position = "none",
title = element_text(size = 7)), leg, ncol = 3)
Figure 1.3: The expected rank together with 50% credible intervals of the rank.
Finally, Figure 1.4 shows the posterior distributions of the νj’s (the mean of the assessor effects) for all PM panels, estimated from the discussed proposals. If for a specific assessor νj is on average negative, this means that assessor j is stricter compared to the other assessors in the panel and his/her stricter grades are corrected more. On the other hand, a νj that is on average positive reflects the behaviour of a assessor who gives generally higher grades compared to the remaining assessors. In the STEM panel, we can observe an extreme assessor (assessor 18). Looking at the raw data, this makes total sense, as assessor 18 gives especially low grades compared to the other assessors (Cs to proposals to which others gave As and ABs).
plot_voter_humanities <-
voter_behavior_distribution(get_mcmc_samples_result = mcmc_hss_h_object,
title = "Humanities",
names_voters = "assessor",
n_voters = hss_h %>%
summarise(n_distinct(voter)) %>% pull(),
xlim_min = -1, xlim_max = 1,
scale = 1)
plot_voter_social_sciences <-
voter_behavior_distribution(get_mcmc_samples_result = mcmc_hss_s_object,
title = "Social Sciences",
names_voters = "assessor",
n_voters = hss_s %>%
summarise(n_distinct(voter)) %>% pull(),
xlim_min = -1, xlim_max = 1,
scale = 1)
plot_voter_biology <-
voter_behavior_distribution(get_mcmc_samples_result = mcmc_ls_b_object,
title = "Biology",
names_voters = "assessor",
n_voters = ls_b %>%
summarise(n_distinct(voter)) %>% pull(),
xlim_min = -1, xlim_max = 1,
scale = 1)
plot_voter_medicine <-
voter_behavior_distribution(get_mcmc_samples_result = mcmc_ls_m_object,
title = "Medicine",
names_voters = "assessor",
n_voters = ls_m %>%
summarise(n_distinct(voter)) %>% pull(),
xlim_min = -1, xlim_max = 1,
scale = 1)
plot_voter_stem <-
voter_behavior_distribution(get_mcmc_samples_result = mcmc_stem_object,
title = "STEM",
names_voters = "assessor",
n_voters = stem %>%
summarise(n_distinct(voter)) %>% pull(),
xlim_min = -1.2, xlim_max = 1,
scale = 1)
gridExtra::grid.arrange(plot_voter_humanities, plot_voter_social_sciences,
plot_voter_medicine, plot_voter_biology,
plot_voter_stem, ncol = 5)
Figure 1.4: Posterior distributions of the referee-specific means νj in the Social Sciences panel. Each of the 14 referees voted on up to 18 proposals, unless they had a conflict of interest or other reasons to be absent during the vote.
Instead of defining the random selection group using the ERs, we can directly use the proposal effects (θi’s). Figure 1.5 shows the θi’s together with their 50%-Crl, the higher the θi the better the quality of the proposal. The division into the maximum of three groups (accepted, rejected and random selection) is the same using the proposal effects as with the expected rank.
Figure 1.5: The means of the posterior distributions of the θi’s together with their 50% credible intervals for the PM fellowship proposals. The dashed blue line is the naive funding line, e.g the posterior mean of the distribution of the θi of the last fundable proposal.
As discussed in the main paper, other quantities derived from the ER can be of interest. The ordered PCER of the proposals in the different panels can be found in Figure 1.6. Note that the lower the PCER, the better the quality of the proposal compared to all competing proposals. The lowest PCER in the Medicine panel is for example 5.6%, meaning that the probability that the best ranked proposal is of worse quality compared to a randomly selected proposal (including itself) is only 5.6%.
Figure 1.6: The PCER of the ranked proposals for all the PM panels.
Another related quantity discussed in the main paper is the SUCRA, which is the surface under the cumulative ranking probability (CRP) plot. The latter can simply be plotted using the MCMC samples of the model parameters. Figures 1.7 gives an example of the this plot for the Humanities panel. The SUCRA of each proposal is written insight the CRP plots. Every proposal has its own CRP plot. Like the PCER, the SUCRA is independent of the number of proposals ranked.
plot_rankogram(data = hss_h,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
rank_theta_name = "rank_theta",
cumulative_rank_prob = TRUE,
mcmc_samples = mcmc_hss_h_object)
Figure 1.7: Cumulative ranking probability plots for the competing proposals in the Humanities Panel.
Figure 1.8 shows the ER of the proposals together with their 50% credible intervals when using an Bayesian hierarchical model for ordinal outcome variable.
# Ordinal version of the JAGS model:
cat("model{
# Likelihood:
for (i in 1:n) { # i is not the application but the review
grade[i] ~ dinterval(latent_trait[i], c[])
latent_trait[i] ~ dnorm(mu[i], inv_sigma2)
mu[i] <- overall_mean + application_intercept[num_application[i]] +
voter_intercept[num_application[i], num_voter[i]]
}
# Ranks:
rank_theta[1:n_application] <- rank(-application_intercept[])
# Priors:
for (j in 1:n_application){
application_intercept[j] ~ dnorm(0, inv_tau_application2)
}
for (l in 1:n_voters){
for(j in 1:n_application){
voter_intercept[j, l] ~ dnorm(nu[l], inv_tau_voter2)
}
}
for (l in 1:n_voters){
nu[l] ~ dnorm(0, 4) # 1/(0.5^2) = 4
}
for (k in 1:5){
cc[k] ~ dunif(-1000, 1000)
}
c[1:5] <- sort(cc)
sigma ~ dunif(0.000001, 2)
inv_sigma2 <- pow(sigma, -2)
inv_tau_application2 <- pow(tau_application, -2)
tau_application ~ dunif(0.000001, 2)
inv_tau_voter2 <- pow(tau_voter, -2)
tau_voter ~ dunif(0.000001, 2)
}",
file = here("jags_model_ordinal_interval.txt"))
seed_ordinal <- seed + 1
mcmc_hss_s_ordinal <-
get_mcmc_samples(data = hss_s,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
n_chains = n.chains, n_iter = n.iter,
n_burnin = n.burnin, n_adapt = n.adapt,
max_iter = 120 * 10^{3},
other_variables = "nu",
ordinal_scale = TRUE, point_scale = 6,
inits_type = "overdispersed",
path_to_jags_model =
here("jags_model_ordinal_interval.txt"),
seed = seed_ordinal, quiet = TRUE)
mcmc_hss_h_ordinal <-
get_mcmc_samples(data = hss_h,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
n_chains = n.chains, n_iter = n.iter,
n_burnin = n.burnin, n_adapt = n.adapt,
max_iter = 120 * 10^{3},
other_variables = "nu",
ordinal_scale = TRUE, point_scale = 6,
inits_type = "overdispersed",
path_to_jags_model =
here("jags_model_ordinal_interval.txt"),
seed = seed_ordinal, quiet = TRUE)
mcmc_ls_b_ordinal <-
get_mcmc_samples(data = ls_b,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
n_chains = n.chains, n_iter = n.iter,
n_burnin = n.burnin, n_adapt = n.adapt,
max_iter = 120 * 10^{3},
other_variables = "nu",
ordinal_scale = TRUE, point_scale = 6,
inits_type = "overdispersed",
path_to_jags_model =
here("jags_model_ordinal_interval.txt"),
seed = seed_ordinal, quiet = TRUE)
mcmc_ls_m_ordinal <-
get_mcmc_samples(data = ls_m,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
n_chains = n.chains, n_iter = n.iter,
n_burnin = n.burnin, n_adapt = n.adapt,
max_iter = 120 * 10^{3},
other_variables = "nu",
ordinal_scale = TRUE, point_scale = 6,
inits_type = "overdispersed",
path_to_jags_model =
here("jags_model_ordinal_interval.txt"),
seed = seed_ordinal, quiet = TRUE)
mcmc_stem_ordinal <-
get_mcmc_samples(data = stem,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
n_chains = n.chains, n_iter = n.iter,
n_burnin = n.burnin, n_adapt = n.adapt,
max_iter = 120 * 10^{3},
other_variables = "nu",
ordinal_scale = TRUE, point_scale = 6,
inits_type = "overdispersed",
path_to_jags_model =
here("jags_model_ordinal_interval.txt"),
seed = seed_ordinal, quiet = TRUE)
plot_er_dist_humanities_ordinal <-
plot_er_distributions(get_mcmc_samples_result = mcmc_hss_h_ordinal,
n_proposals = hss_h %>%
summarise(n_distinct(proposal)) %>% pull(),
name_er_or_theta = "rank_theta", title = "Humanities",
number_fundable = how_many_fundable["hss_h"],
outer_show = FALSE, proposal = "")
plot_er_dist_social_sciences_ordinal <-
plot_er_distributions(get_mcmc_samples_result = mcmc_hss_s_ordinal,
n_proposals = hss_s %>%
summarise(n_distinct(proposal)) %>% pull(),
name_er_or_theta = "rank_theta", title = "Social Sciences",
number_fundable = how_many_fundable["hss_s"],
outer_show = FALSE, proposal = "")
plot_er_dist_biology_ordinal <-
plot_er_distributions(get_mcmc_samples_result = mcmc_ls_b_ordinal,
n_proposals = ls_b %>%
summarise(n_distinct(proposal)) %>% pull(),
name_er_or_theta = "rank_theta", title = "Biology",
number_fundable = how_many_fundable["ls_b"],
outer_show = FALSE, proposal = "")
plot_er_dist_medicine_ordinal <-
plot_er_distributions(get_mcmc_samples_result = mcmc_ls_m_ordinal,
n_proposals = ls_m %>%
summarise(n_distinct(proposal)) %>% pull(),
name_er_or_theta = "rank_theta", title = "Medicine",
number_fundable = how_many_fundable["ls_m"],
outer_show = FALSE, proposal = "")
plot_er_dist_stem_ordinal <-
plot_er_distributions(get_mcmc_samples_result = mcmc_stem_ordinal,
n_proposals = stem %>%
summarise(n_distinct(proposal)) %>% pull(),
name_er_or_theta = "rank_theta", title = "STEM",
number_fundable = how_many_fundable["stem"],
outer_show = FALSE, proposal = "")
leg <- cowplot::get_legend(plot_er_dist_stem_ordinal)
gridExtra::grid.arrange(plot_er_dist_humanities_ordinal +
theme(legend.position = "none"),
plot_er_dist_social_sciences_ordinal +
theme(legend.position = "none"),
plot_er_dist_medicine_ordinal +
theme(legend.position = "none"),
plot_er_dist_biology_ordinal +
theme(legend.position = "none"),
plot_er_dist_stem_ordinal +
theme(legend.position = "none"), leg, ncol = 2)
Figure 1.8: The expected rank together with 50% credible intervals of the rank using an ordinal outcome model.
rm(mcmc_hss_h_ordinal,
mcmc_stem_ordinal,
mcmc_hss_s_ordinal,
mcmc_ls_b_ordinal,
mcmc_ls_m_ordinal,
mcmc_hss_s_object,
mcmc_hss_h_object,
mcmc_stem_object,
mcmc_hss_h,
mcmc_hss_s,
mcmc_stem,
mcmc_ls_b,
mcmc_ls_m)
(Note that to make this online supplement run faster we decreased the number of iterations, adaptation, and bunrnin for the following example, as compared to the manuscript.)
Project funding (PF) is the SNSF’s most important funding instrument. PF grants are meant for research proposals with a topic of the applicant’s own choice. The data that we will analyze in the following originates from the evaluation of the April 2020 Call to the Mathematics, Natural and Engineering Sciences (MINT) Division. All in all, 353 grant proposals were evaluated in the MINT division. To ensure a manageable amount of work to be spread among the panel members and the quality of the reviews and gradings, the evaluation was done in four separate sections. The sections were defined as such that they were of similar sizes and had a similar number of international members and female members. This approach ensures that the section compositions are comparable. Each section did indeed define their own funding line, while keeping the success rate in each section the same ( ∼ 30%). Table 2.1 shows the number of proposals evaluated and fundable by section (nine members are present in each section). The number of fundable proposals will be defined as ∼ 30% of the number of evaluated proposals. Each section member or assessor has to individually evaluate each proposal in their section (unless they have a conflict of interest or another reason to be absent during the vote). The average evaluation scores (6=A, , 1=D) given to all the proposals in each section and the averages of the fundable proposals (30% best ranked according to their average score) are also shown.
| Section | N. of proposals | N. of fundable proposa | Avg score | Avg score of 30% best |
|---|---|---|---|---|
| one | 87 | 26 | 3.84 | 5.01 |
| two | 92 | 28 | 3.81 | 5.02 |
| three | 86 | 26 | 3.97 | 5.24 |
| four | 88 | 26 | 3.73 | 5.04 |
To understand how reliably the assessors’ evaluations were in the different sections, we refer to Figure 2.1. Compared to the previous Postdoc.Mobility case study, the intra-class correlation coefficients are higher and can be classified as good. One reason for the higher reliability here is that all project funding proposals are included here - also the ones with low/high average scores, for which the votes are usually less variable. (In contrast, we only looked at PM proposals with average scores in the middle range.)
leg <- cowplot::get_legend(get_grades_plot(mint_section1))
gridExtra::grid.arrange(get_grades_plot(mint_section1, mint_section1_mat,
x_min = 1.8, title = "Section 1",
jitter_alpha = .4, jitter_w = .05) +
theme(legend.position = "none"),
get_grades_plot(mint_section2, mint_section2_mat,
x_min = 1.8, title = "Section 2",
jitter_alpha = .4, jitter_w = .05) +
theme(legend.position = "none"),
get_grades_plot(mint_section3, mint_section3_mat,
x_min = 1.8, title = "Section 3",
jitter_alpha = .4, jitter_w = .05)+
theme(legend.position = "none"),
get_grades_plot(mint_section4, mint_section4_mat,
x_min = 1.8, title = "Section 4",
jitter_alpha = .4, jitter_w = .05) +
theme(legend.position = "none")#, leg
)
Figure 2.1: The individual votes given to all proposals by all panel members. The red dots represent the averages of those individual votes for each proposal. The intra-class correlation coefficients (ICC) together with their 95% confidence intervals of the different panels can be found in the titles of the subfigures. The overall reliability in the those panels can be classified as good.
Then, as for the postdoc.mobility fellowships Bayesian hierarchical models will be fitted for each of the four sections individually, but also for the pooled data and adding an additional section effect in the model definition. Afterwards the computation of the expected ranks (and the PCER) is straightforward using the MCMC samples from the latter models.
#### JAGS model for continuous outcome ####
cat("model{
# Likelihood:
for (i in 1:n) { # i is not the application but the review
grade[i] ~ dnorm(mu[i], inv_sigma2)
# inv_sigma2 is precision (1 / variance)
mu[i] <- overall_mean + application_intercept[num_application[i]] +
voter_intercept[num_application[i], num_voter[i]] +
section_intercept[num_section[i]]
}
# Ranks:
rank_theta[1:n_application] <- rank(-application_intercept[])
# Priors:
for (j in 1:n_application){
application_intercept[j] ~ dnorm(0, inv_tau_application2)
}
for (l in 1:n_voters){
for(j in 1:n_application){
voter_intercept[j, l] ~ dnorm(nu[l], inv_tau_voter2)
}
}
for (l in 1:n_voters){
nu[l] ~ dnorm(0, 4) # 1/(0.5^2) = 4
}
for (l in 1:n_section){
section_intercept[l] ~ dnorm(0, inv_tau_section2)
}
sigma ~ dunif(0.000001, 2)
inv_sigma2 <- pow(sigma, -2)
inv_tau_application2 <- pow(tau_application, -2)
tau_application ~ dunif(0.000001, 2)
inv_tau_voter2 <- pow(tau_voter, -2)
tau_voter ~ dunif(0.000001, 2)
inv_tau_section2 <- pow(tau_section, -2)
tau_section ~ dunif(0.000001, 2)
}",
file = here("revision", "manuscript", "jags_model_with_sections.txt"))
n.iter <- 150 * 10^{3} #500 * 10^{3}
n.burnin <- 50 * 10^{3} # 100 * 10^{3}
n.adapt <- 50 * 10^{3} # 100 * 10^{3}
n.chains <- 2
seed <- 1991
mcmc_mint <-
ERforResearch:::get_mcmc_samples(data = mint_sections,
id_application = "proposal",
id_voter = "voter",
id_section = "section",
grade_variable = "num_grade",
tau_section_name = "tau_section",
n_chains = n.chains, n_iter = n.iter,
n_burnin = n.burnin, n_adapt = n.adapt,
max_iter = 150 * 10^{3}, # 500 * 10^{3},
inits_type = "overdispersed",
path_to_jags_model =
here("revision", "manuscript",
"jags_model_with_sections.txt"),
seed = seed, quiet = TRUE)
# Section by section:
n.iter <- 120 * 10^{3}
n.burnin <- 30 * 10^{3}
n.adapt <- 30 * 10^{3}
mcmc_mint_section1 <-
ERforResearch:::get_mcmc_samples(data = mint_section1,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
n_chains = n.chains, n_iter = n.iter,
n_burnin = n.burnin, n_adapt = n.adapt,
max_iter = 120 * 10^{3},
inits_type = "overdispersed",
seed = seed, quiet = TRUE)
mcmc_mint_section2 <-
ERforResearch:::get_mcmc_samples(data = mint_section2,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
n_chains = n.chains, n_iter = n.iter,
n_burnin = n.burnin, n_adapt = n.adapt,
max_iter = 120 * 10^{3},
inits_type = "overdispersed",
seed = seed, quiet = TRUE)
mcmc_mint_section3 <-
ERforResearch:::get_mcmc_samples(data = mint_section3,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
n_chains = n.chains, n_iter = n.iter,
n_burnin = n.burnin, n_adapt = n.adapt,
max_iter = 120 * 10^{3},
inits_type = "overdispersed",
seed = seed, quiet = TRUE)
mcmc_mint_section4 <-
ERforResearch:::get_mcmc_samples(data = mint_section4,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
n_chains = n.chains, n_iter = n.iter,
n_burnin = n.burnin, n_adapt = n.adapt,
max_iter = 120 * 10^{3},
inits_type = "overdispersed",
seed = seed, quiet = TRUE)
ranks_mint <-
get_er_from_jags(data = mint_sections,
id_application = "proposal",
id_voter = "voter",
id_section = "section",
grade_variable = "num_grade",
tau_section_name = "tau_section",
mcmc_samples = mcmc_mint,
seed = seed)
# Section by section:
ranks_mint_section1 <-
get_er_from_jags(data = mint_section1,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
mcmc_samples = mcmc_mint_section1,
seed = seed)
ranks_mint_section2 <-
get_er_from_jags(data = mint_section2,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
mcmc_samples = mcmc_mint_section2,
seed = seed)
ranks_mint_section3 <-
get_er_from_jags(data = mint_section3,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
mcmc_samples = mcmc_mint_section3,
seed = seed)
ranks_mint_section4 <-
get_er_from_jags(data = mint_section4,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
mcmc_samples = mcmc_mint_section4,
seed = seed)
#### JAGS model for ordinal outcome ####
cat("model{
# Likelihood:
for (i in 1:n) { # i is not the application but the review
grade[i] ~ dinterval(latent_trait[i], c[])
latent_trait[i] ~ dnorm(mu[i], inv_sigma2)
mu[i] <- overall_mean + application_intercept[num_application[i]] +
voter_intercept[num_application[i], num_voter[i]] +
section_intercept[num_section[i]]
}
# Ranks:
rank_theta[1:n_application] <- rank(-application_intercept[])
# Priors:
for (j in 1:n_application){
application_intercept[j] ~ dnorm(0, inv_tau_application2)
}
for (l in 1:n_voters){
for(j in 1:n_application){
voter_intercept[j, l] ~ dnorm(nu[l], inv_tau_voter2)
}
}
for (l in 1:n_voters){
nu[l] ~ dnorm(0, 4) # 1/(0.5^2) = 4
}
for (l in 1:n_section){
section_intercept[l] ~ dnorm(0, inv_tau_section2)
}
for (k in 1:5){
cc[k] ~ dunif(-1000, 1000)
}
c[1:5] <- sort(cc)
sigma ~ dunif(0.000001, 2)
inv_sigma2 <- pow(sigma, -2)
inv_tau_application2 <- pow(tau_application, -2)
tau_application ~ dunif(0.000001, 2)
inv_tau_voter2 <- pow(tau_voter, -2)
tau_voter ~ dunif(0.000001, 2)
inv_tau_section2 <- pow(tau_section, -2)
tau_section ~ dunif(0.000001, 2)
}",
file = here("revision", "manuscript",
"jags_model_with_sections_ordinal.txt"))
n.iter <- 150 * 10^{3} # 500 * 10^{3}
n.burnin <- 50 * 10^{3} # 100 * 10^{3}
n.adapt <- 50 * 10^{3} # 100 * 10^{3}
n.chains <- 2
seed_ordinal <- seed + 1
mcmc_mint_ordinal <-
get_mcmc_samples(data = mint_sections,
id_application = "proposal",
id_voter = "voter",
id_section = "section",
grade_variable = "num_grade",
tau_section_name = "tau_section",
n_chains = n.chains, n_iter = n.iter,
n_burnin = n.burnin, n_adapt = n.adapt,
max_iter = 150 * 10^{3}, # 500 * 10^{3},
inits_type = "overdispersed",
ordinal_scale = TRUE,
point_scale = 6,
path_to_jags_model =
here("revision", "manuscript",
"jags_model_with_sections_ordinal.txt"),
seed = seed_ordinal, quiet = TRUE)
# Section by section:
n.iter <- 120 * 10^{3}
n.burnin <- 30 * 10^{3}
n.adapt <- 30 * 10^{3}
mcmc_mint_section1_ordinal <-
get_mcmc_samples(data = mint_section1,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
n_chains = n.chains, n_iter = n.iter,
n_burnin = n.burnin, n_adapt = n.adapt,
max_iter = 120 * 10^{3},
inits_type = "overdispersed",
ordinal_scale = TRUE,
point_scale = 6,
path_to_jags_model =
here("revision", "manuscript",
"jags_model_ordinal_interval.txt"),
seed = seed_ordinal, quiet = TRUE)
mcmc_mint_section2_ordinal <-
get_mcmc_samples(data = mint_section2,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
n_chains = n.chains, n_iter = n.iter,
n_burnin = n.burnin, n_adapt = n.adapt,
max_iter = 120 * 10^{3},
inits_type = "overdispersed",
ordinal_scale = TRUE,
point_scale = 6,
path_to_jags_model =
here("revision", "manuscript",
"jags_model_ordinal_interval.txt"),
seed = seed_ordinal, quiet = TRUE)
mcmc_mint_section3_ordinal <-
get_mcmc_samples(data = mint_section3,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
n_chains = n.chains, n_iter = n.iter,
n_burnin = n.burnin, n_adapt = n.adapt,
max_iter = 120 * 10^{3},
inits_type = "overdispersed",
ordinal_scale = TRUE,
point_scale = 6,
path_to_jags_model =
here("revision", "manuscript",
"jags_model_ordinal_interval.txt"),
seed = seed_ordinal, quiet = TRUE)
mcmc_mint_section4_ordinal <-
get_mcmc_samples(data = mint_section4,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
n_chains = n.chains, n_iter = n.iter,
n_burnin = n.burnin, n_adapt = n.adapt,
max_iter = 120 * 10^{3},
inits_type = "overdispersed",
ordinal_scale = TRUE,
point_scale = 6,
path_to_jags_model =
here("revision", "manuscript",
"jags_model_ordinal_interval.txt"),
seed = seed_ordinal, quiet = TRUE)
ranks_mint_ordinal <-
get_er_from_jags(data = mint_sections,
id_application = "proposal",
id_voter = "voter",
id_section = "section",
grade_variable = "num_grade",
tau_section_name = "tau_section",
mcmc_samples = mcmc_mint_ordinal,
ordinal_scale = TRUE,
point_scale = 6,
seed = seed)
# Section by Section:
ranks_mint_section1_ordinal <-
get_er_from_jags(data = mint_section1,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
mcmc_samples = mcmc_mint_section1_ordinal,
ordinal_scale = TRUE,
point_scale = 6,
seed = seed_ordinal)
ranks_mint_section2_ordinal <-
get_er_from_jags(data = mint_section2,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
mcmc_samples = mcmc_mint_section2_ordinal,
seed = seed_ordinal)
ranks_mint_section3_ordinal <-
get_er_from_jags(data = mint_section3,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
mcmc_samples = mcmc_mint_section3_ordinal,
seed = seed_ordinal)
ranks_mint_section4_ordinal <-
get_er_from_jags(data = mint_section4,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
mcmc_samples = mcmc_mint_section4_ordinal,
seed = seed_ordinal)
Figure 2.2 shows the resulting expected ranks for each of the sections separately. The proposals are first ranked based on their average score (Fixed Ranking) and the posterior mean of the score. The latter is adjusted for random variation (with a random intercept for the proposal), and the referee behavior (through a random intercept for the assessor). Remember that, each section will define a separate funding line. Note that rank 1 is reserved for the best proposal according to its quality assessment. On top of these figures, we can read of the rankability which is very high. This means, that given the many proposals that have to be ranked in each section, the Bayesian hierarchical models are able to explain most of the variation; the estimated proposal effects are close to the true effect and less clouded by random variation. This is also shown in the Figure by the fact that the ER do not quickly converge to the median. The best ERs are 1.47 for section one, 1.27 for section two, 2.24 for section three and 1.08 for section four; so very close to 1. The worst ERs are 86.28 for section one, 92 for section two, 85.89 for section three and 87.19 for section four, which are all very close to the total number of submissions.
mcmc_mint_section1$samples <-
mcmc_mint_section1$samples[sample(1:240000, 24000),]
mcmc_mint_section2$samples <-
mcmc_mint_section2$samples[sample(1:240000, 24000),]
mcmc_mint_section3$samples <-
mcmc_mint_section3$samples[sample(1:240000, 24000),]
mcmc_mint_section4$samples <-
mcmc_mint_section4$samples[sample(1:240000, 24000),]
mcmc_mint$samples <-
mcmc_mint$samples[sample(1:240000, 24000),]
x1 <- round(.3 * (mint_section1 %>%
pull(proposal) %>%
unique() %>%
length()))
p1 <- plotting_er_results(ranks_mint_section1, title = "Section 1",
result_show = FALSE, how_many_fundable = x1,
draw_funding_line = FALSE)
x2 <- round(.3 * (mint_section2 %>%
pull(proposal) %>%
unique() %>%
length()))
p2 <- plotting_er_results(ranks_mint_section2, title = "Section 2",
result_show = FALSE, how_many_fundable = x2,
draw_funding_line = FALSE)
x3 <- round(.3 * (mint_section3 %>%
pull(proposal) %>%
unique() %>%
length()))
p3 <- plotting_er_results(ranks_mint_section3, title = "Section 3",
result_show = FALSE, how_many_fundable = x3,
draw_funding_line = FALSE)
x4 <- round(.3 * (mint_section4 %>%
pull(proposal) %>%
unique() %>%
length()))
p4 <- plotting_er_results(ranks_mint_section4, title = "Section 4",
result_show = FALSE, how_many_fundable = x4,
draw_funding_line = FALSE)
leg <- cowplot::get_legend(p1)
gridExtra::grid.arrange(p1 +
theme(legend.position = "none",
axis.text.y = element_blank(),
axis.title.x = element_blank()),
p2 +
theme(legend.position = "none",
axis.text.y = element_blank(),
axis.title.x = element_blank()),
p3 +
theme(legend.position = "none",
axis.text.y = element_blank(),
axis.title.x = element_blank()),
p4 +
theme(legend.position = "none",
axis.text.y = element_blank(),
axis.title.x = element_blank()))
Figure 2.2: The proposals to the different sections in the MINT division ranked based on the average overall score (Fixed Ranking), the posterior means and the expected rank (ER). The color code tells us which proposals could be funded (orange) to ensure a 30% success rate.
To better decide on which proposals should be funded in each section individually, and where, if needed, a random selection group should be defined, consider Figure . Hence, looking at the section seperately would lead to a random selection group of 9 in Section 1, which is 10.3% of all proposals evaluated in this section; 5 (55.6%) of these can still be funded. No random selection group is needed for section 2 and 3, while three more proposals would be selected among the six proposal in the random selection group for section 4.
Figure 2.3: The ER with 50% credible intervals. The color code shows the final group (Accepted, Rejected or Random Selection).
As the ER methodology is able to account for further structural differences, the proposals can also be ranked all together. When incorporating all the information from the four sections in the same model, we have to account for the hierarchical structure based on the sections. Certainly, there are different dynamics in the sections that might have an influence on the final score, but are independent of the actual quality of the discussed proposals. As can be seen in Table 2.1 the score distributions were not exactly the same in the different sections, with section three giving higher scores on average, even though the sections were constructed as to being comparable. Therefore, a random intercept for the section can be introduced in the Bayesian hierarchichal model to adjust for those dynamics.
Figure 2.4: (A) All the proposals to the MINT division (from all sections) ranked based on the average overall score (Fixed), the posterior means and the expected rank. The color code tells us which proposals could be funded (orange) to ensure a 30% success rate. (B) Shows the same ER ordered from the best (rank 1) to the worst (rank 352) together with their 50% credible intervals. A naive funding line to ensure a 30% success rate is drawn (blue dashed line).
Figure 2.4 (A) shows the resulting expected ranks. This figure mainly helps to get a first impression of whether the evaluation procedure manages to find substantial differences between proposals. We can observe the creation of several clusters. The right side of the Figure (B) shows the same ER ordered from the best ranked proposal (left) to the worst (right) together with their 50% credible intervals. The naive funding line is defined as the ER of the last fundable proposal: the 106th (30% of 353) best ranked, according to its ER. A zoom into this last figure helps to find a cluster of proposals with credible intervals overlapping the naive funding line for which a random selection should be applied to decide upon the last remaining proposals to fund. 13 of the proposals in this zoomed part have their credible interval crossing the naive funding line, and would therefore form the random selection group. Seven of these 13 proposals will be selected for funding, which is 53.8%.
Similar results are found when using a Bayesian hierarchical model that explicitly accounts for the ordinal nature of the scores, see Figure 2.5. Figure 2.6 shows the Bland-Altman plot; the agreement between the BR with an ordinal outcome model and the BR with a continuous outcome model. The mean of all the differences (gray horizontal line) is very close to 0. The differences in ER for most proposals lie in the 95% limits of agreement (red dotted horizontal lines). However, even though both models result in similar results, it is hard to say which modelling approach performs better without knowing exactly which proposals should have been funded or rejected.}
Figure 2.5: The ER - computed using an ordinal outcome model - ordered from the best (rank 1) to the worst (rank 352) together with their 50% credible intervals. A naive funding line to ensure a 30% success rate is drawn (blue dashed line).
get_agreement_plot(ranks_mint$rankings,
ranks_mint_ordinal$rankings,
title = "MINT - Project Funding",
ymin = -15, ymax = 15, pt_alpha = .5,
pt_size = .8)
Figure 2.6: Difference in expected rank depending on whether the outcome is modeled as continuous or ordinal variable versus the average ER as computed using the continuous and ordinal outcome model, for all proposals evaluated for project funding. The 95% limits of agreement are plotted (red dotted lines) as well as the mean difference (gray line, bias).
The more complex the models the more burnin and iterations are needed to ensure convergence of the most important parameters. With the option dont_bind set to TRUE in the ERforResearch::get_mcmc_samples() function, we can directly use the MCMC sample to perform convergence diagnostics, like plotting traceplots. The option in the function solely does not bind the different chains as for example the two chains used in our examples. Using he bayesplot package traceplots and others can easily be plotted. The following code chunk for example shows the traceplots of the θi’s of all the proposals of the Biology Panel in Postdoc.mobility.
bayesplot::mcmc_trace(mcmc_ls_b_object$samples,
pars = paste0("application_intercept[", 1:18, "]"))
Figure 3.1: Traceplots of the proposal effects of all 18 proposals in the Biology panel for the Postdoc.Mobility fellowships.
Below we show the values R̂ of the Gelman-Rubin convergence diagnostic and traceplots for some important parameters for the Social Sciences panel in the Postdoc.Mobility case study. R̂ values substantially above 1 indicate lack of convergence. Gelman et al. (2014) have recommended to use 1.1 as threshold value for R̂.
## PM case study setting
n.iter <- 10000
n.burnin <- 5000
n.chains <- 2
seed <- 1991
seed_ordinal <- seed + 1
## Ordinal models:
# refit the model without pooling the 2 chains
mcmc_hss_s_ordinal_2 <-
get_mcmc_samples(data = hss_s,
id_application = "proposal",
id_voter = "voter",
grade_variable = "num_grade",
n_chains = n.chains, n_iter = n.iter,
n_burnin = n.burnin,
other_variables = "nu",
ordinal_scale = TRUE, point_scale = 6,
path_to_jags_model =
here( "revision", "manuscript",
"jags_model_ordinal_interval.txt"),
seed = seed_ordinal, quiet = TRUE,
dont_bind = TRUE)
# do not perform multivariate computation since this sometimes leads to
# error messages and we do not need that result
# use the whole series, not only the second half (autoburnin = FALSE)
rhat_hss_s <- gelman.diag(mcmc_hss_s_ordinal_2$samples, autoburnin = FALSE,
multivariate = FALSE)
# rhat_est_hss_s <- rhat_hss_s$psrf[ ,1]
# create R_hat values table for Social Sciences panel in PM
n_proposal <- 18
n_voter <- 14
# adapt the row names to notation in the paper
rownames <- c(paste0("$\\theta_{", seq(1, n_proposal), "}$"),
paste0("$\\nu_{", seq(1, n_voter), "}$"),
paste0("rank", "$(\\theta_{", seq(1, n_proposal), "})$"),
"$\\sigma$", "$\\tau$", "$\\tau_{\\nu}$")
table_hss_s <- as.data.frame(cbind(rhat_hss_s$psrf[ ,1]))#, rhat_hss_s$psrf[ ,2]))
row.names(table_hss_s) <- rownames
kable(table_hss_s, digits = 2, escape = FALSE,
col.names = c("$\\hat{R}$"),#, "Upper CI limit"),
row.names = TRUE, booktabs = TRUE,
longtable = TRUE,
caption = "Estimates of $\\hat{R}$, the Gelman-Rubin convergence diagnostic
are given for different parameters in the ordinal model for the Social Sciences panel in the PM case study.") %>%
#, and the upper limits of the 95\\% confidence intervals for $\\hat{R}$
kable_styling(latex_options = c("hold_position", "repeat_header"))
| R̂ | |
|---|---|
| θ1 | 1.03 |
| θ2 | 1.03 |
| θ3 | 1.01 |
| θ4 | 1.03 |
| θ5 | 1.01 |
| θ6 | 1.01 |
| θ7 | 1.01 |
| θ8 | 1.00 |
| θ9 | 1.00 |
| θ10 | 1.01 |
| θ11 | 1.01 |
| θ12 | 1.00 |
| θ13 | 1.00 |
| θ14 | 1.00 |
| θ15 | 1.00 |
| θ16 | 1.00 |
| θ17 | 1.00 |
| θ18 | 1.00 |
| ν1 | 1.00 |
| ν2 | 1.01 |
| ν3 | 1.00 |
| ν4 | 1.00 |
| ν5 | 1.00 |
| ν6 | 1.00 |
| ν7 | 1.00 |
| ν8 | 1.00 |
| ν9 | 1.00 |
| ν10 | 1.00 |
| ν11 | 1.00 |
| ν12 | 1.00 |
| ν13 | 1.00 |
| ν14 | 1.00 |
| rank(θ1) | 1.00 |
| rank(θ2) | 1.00 |
| rank(θ3) | 1.00 |
| rank(θ4) | 1.00 |
| rank(θ5) | 1.00 |
| rank(θ6) | 1.00 |
| rank(θ7) | 1.00 |
| rank(θ8) | 1.00 |
| rank(θ9) | 1.00 |
| rank(θ10) | 1.00 |
| rank(θ11) | 1.00 |
| rank(θ12) | 1.00 |
| rank(θ13) | 1.00 |
| rank(θ14) | 1.00 |
| rank(θ15) | 1.01 |
| rank(θ16) | 1.01 |
| rank(θ17) | 1.00 |
| rank(θ18) | 1.00 |
| σ | 1.00 |
| τ | 1.02 |
| τν | 1.02 |